import pandas as pd
from pyspark import SparkContext
from pyspark.sql.types import *
from pyspark.sql import SparkSession

import os
import sys

os.environ['PYSPARK_PYTHON'] = sys.executable
os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable
file_path = r"HTTP_20130313143750.csv"

pd_df = pd.read_csv(file_path, thousands=" ", header=0)
print(pd_df)

schema = StructType([
    StructField("Column1", FloatType(), True),
    StructField("Column2", LongType(), True),
    StructField("Column3", StringType(), True),
    StructField("Column4", StringType(), True),
    StructField("Column5", StringType(), True),
    StructField("Column6", StringType(), True),
    StructField("Column7", IntegerType(), True),
    StructField("Column8", IntegerType(), True),
    StructField("Column9", IntegerType(), True),
    StructField("Column10", IntegerType(), True),
    StructField("Column11", IntegerType(), True),
])
spark = SparkSession.builder.getOrCreate()
df = spark.createDataFrame(pd_df, schema=schema)
df = df.select("Column2", "Column9", "Column10").withColumn("total", df["Column9"] + df["Column10"])
df.sort(df["total"].desc()).show()
